Will build a one layer neural network that can detect gravitational waves.
LIGO whitens data by transforming into frequency domain and dividing the signal by the Amplitude spectrum density of it's interpolated form. This sets the whitened signal in units of sigma. Has the benefit of normalizing low and high frequencies.
LIGO bandpasses by using a two-way filter that cuts frequencies below 20Hz and above 300Hz.
## Initialize envornmental variables
# Standard python numerical analysis imports:
import numpy as np
import pandas as pd
from scipy import signal
from scipy.interpolate import interp1d
from scipy.signal import butter, filtfilt, iirdesign, zpk2tf, freqz
# the ipython magic below must be commented out in the .py file, since it doesn't work.
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import h5py
# LIGO-specific readligo.py
import readligo as rl
def LIGO(nameH1, nameL1, nameNR, timeShift):
#----------------------------------------------------------------
# Load LIGO data from a single file
#----------------------------------------------------------------
# First from H1
fn_H1 = nameH1
strain_H1, time_H1, chan_dict_H1 = rl.loaddata(fn_H1, 'H1')
# and then from L1
fn_L1 = nameL1
strain_L1, time_L1, chan_dict_L1 = rl.loaddata(fn_L1, 'L1')
#event occurance for GW150914
#tevent = 1126259462.422 # Mon Sep 14 09:50:45 GMT 2015
tevent, shift = timeShift[0], timeShift[1]
# sampling rate:
fs = 4096
# both H1 and L1 will have the same time vector, so:
time = time_H1
# the time sample interval (uniformly sampled!)
dt = time[1] - time[0]
# read in the NR template
fn_NR = nameNR # 'data/GW150914_4_NR_waveform.txt'
NRtime, NR_H1 = np.genfromtxt(fn_NR).transpose()
#----------------------------------------------------------------
# Run all calculations
#----------------------------------------------------------------
# number of sample for the fast fourier transform:
NFFT = 1*fs
fmin = 10
fmax = 2000
Pxx_H1, freqs = mlab.psd(strain_H1, Fs = fs, NFFT = NFFT)
Pxx_L1, freqs = mlab.psd(strain_L1, Fs = fs, NFFT = NFFT)
# We will use interpolations of the ASDs (Pxx's) computed above for whitening:
psd_H1 = interp1d(freqs, Pxx_H1)
psd_L1 = interp1d(freqs, Pxx_L1)
# function to whiten data
def whiten(strain, interp_psd, dt):
Nt = len(strain)
freqs = np.fft.rfftfreq(Nt, dt)
# whitening: transform to freq domain, divide by asd, then transform back,
# taking care to get normalization right.
hf = np.fft.rfft(strain)
white_hf = hf / (np.sqrt(interp_psd(freqs) /dt/2.))
white_ht = np.fft.irfft(white_hf, n=Nt)
return white_ht
# now whiten the data from H1 and L1, and also the NR template:
strain_H1_whiten = whiten(strain_H1,psd_H1,dt)
strain_L1_whiten = whiten(strain_L1,psd_L1,dt)
NR_H1_whiten = whiten(NR_H1,psd_H1,dt)
# number of sample for the fast fourier transform:
NFFT = 1*fs
fmin = 10
fmax = 2000
Pxx_H1_whiten, freqs = mlab.psd(strain_H1_whiten, Fs = fs, NFFT = NFFT)
Pxx_L1_whiten, freqs = mlab.psd(strain_L1_whiten, Fs = fs, NFFT = NFFT)
# We will use interpolations of the ASDs computed above for whitening:
psd_H1 = interp1d(freqs, Pxx_H1_whiten)
psd_L1 = interp1d(freqs, Pxx_L1_whiten)
# We need to suppress the high frequencies with some bandpassing:
bb, ab = butter(4, [20.*2./fs, 300.*2./fs], btype='band')
strain_H1_whitenbp = filtfilt(bb, ab, strain_H1_whiten)
strain_L1_whitenbp = filtfilt(bb, ab, strain_L1_whiten)
NR_H1_whitenbp = filtfilt(bb, ab, NR_H1_whiten)
# plot the data after whitening:
# first, shift L1 by 7 ms, and invert. See the GW150914 detection paper for why!
strain_L1_shift = -np.roll(strain_L1_whitenbp,int(0.007*fs))
#----------------------------------------------------------------
# Pandas Dataframes
#----------------------------------------------------------------
# Time domain for strain data
S2_dict = {'time': time_H1, 'strain_L1': strain_L1, 'strain_H1': strain_H1,
'strain_L1_whiten': strain_L1_whiten, 'strain_H1_whiten': strain_H1_whiten,
'strain_H1_whitenbp': strain_H1_whitenbp, 'strain_L1_whitenbp': strain_L1_shift }
S2 = pd.DataFrame(data=S2_dict)
S2['time'] = pd.to_datetime(S2['time'],unit='s', origin=pd.Timestamp('1980-01-06'))
# Comparison waveform
NR_dict = {'NRtime': NRtime, 'NR_Waveform': NR_H1_whitenbp}
NR = pd.DataFrame(data=NR_dict)
NR['time'] = pd.to_datetime(tevent+NR['NRtime']+shift,unit='s', origin=pd.Timestamp('1980-01-06'))
# Frequency domain
F2_dict = {'freq': freqs, 'Pxx_H1': np.sqrt(Pxx_H1), 'Pxx_L1': np.sqrt(Pxx_L1),
'Pxx_H1_whiten': np.sqrt(Pxx_H1_whiten), 'Pxx_L1_whiten': np.sqrt(Pxx_L1_whiten)}
F2 = pd.DataFrame(data=F2_dict)
return(S2, F2, NR)
# Call the function.
# Returns whitened data in time domain (S2), frequency domain (F2), and matched waveform
# Event 2 copies time and waveform of event 1. Simply for calling functions
fn_NR = 'data/GW150914_4_NR_waveform.txt'
fn_H1V1 = 'data/H-H1_LOSC_4_V1-1126257414-4096.hdf5'
fn_L1V1 = 'data/L-L1_LOSC_4_V1-1126257414-4096.hdf5'
teventV1 = 1126259462.422
shiftV1 = 0.002
S2V1, F2V1, NRV1 = LIGO(fn_H1V1, fn_L1V1, fn_NR, [teventV1,shiftV1])
fn_H1V2 = 'data/H-H1_LOSC_4_V2-1135136228-4096.hdf5'
fn_L1V2 = 'data/L-L1_LOSC_4_V2-1135136228-4096.hdf5'
teventV2 = 1126259462.422
shiftV2 = 0.002
S2V2, F2V2, NRV2 = LIGO(fn_H1V2, fn_L1V2, fn_NR, [teventV2,shiftV2])
def plotEvent(S2, NR, tevent, columns):
# plot +- .5 seconds around the event:
#tevent = 1126259462.422 # Mon Sep 14 09:50:45 GMT 2015
#tevent = pd.to_datetime(tevent, unit='s', origin=pd.Timestamp('1980-01-06'))
deltat = .05 # seconds around the event
deltat = pd.Timedelta(seconds=deltat)
# index into the strain time series for this time interval:
S2_windowed = S2[(S2['time'] > tevent-2*deltat) & (S2['time'] < tevent+deltat)]
NR_windowed = NR[(NR['time'] > tevent-2*deltat) & (NR['time'] < tevent+deltat)]
fig, axes = plt.subplots(figsize=(20,12), nrows=1, ncols=1)
axes.plot(S2_windowed['time'], S2_windowed[columns[0]],'r',label=columns[0])
axes.plot(S2_windowed['time'], S2_windowed[columns[1]],'g',label=columns[2])
axes.plot(NR_windowed['time'],NR_windowed[columns[2]],'k',label=columns[2])
axes.set_title('time series of GW whitened and bandpassed ~0.05 seconds around event')
teventV1 = 1126259462.422
teventV1 = pd.to_datetime(teventV1, unit='s', origin=pd.Timestamp('1980-01-06'))
cols = ['strain_H1_whitenbp', 'strain_L1_whitenbp', 'NR_Waveform']
plotEvent(S2V1, NRV1, teventV1, cols)
Convolving parts of the characteristic shape of the waveform with a signal should produce a spike at the occurence.
Gravitational waves can have different frequencies based on the inspiral velocity
Therefore add noise to the windows to catch shape of GW.
Psuedo-Neural Network constructed of essentially one layer
# example from scipi
from scipy import signal
sig = np.repeat([0., 1., 0.], 100)
win = signal.hann(50)
#filtered = signal.convolve(sig, win, mode='same') / sum(win)
filtered = signal.correlate(sig, win, mode='same') / len(win)
fig, (ax_orig, ax_win, ax_filt) = plt.subplots(figsize=(20,18), nrows=3, ncols=1, sharex=True)
ax_orig.plot(sig)
ax_orig.set_title('Original pulse')
ax_orig.margins(0, 0.1)
ax_win.plot(win)
ax_win.set_title('Filter impulse response')
ax_win.margins(0, 0.1)
ax_filt.plot(filtered)
ax_filt.set_title('Filtered signal')
ax_filt.margins(0, 0.1)
fig.tight_layout()
fig.show()
C:\ProgramData\Anaconda3\lib\site-packages\matplotlib\figure.py:403: UserWarning: matplotlib is currently using a non-GUI backend, so cannot show the figure "matplotlib is currently using a non-GUI backend, "
tevent = 1126259462.422
tevent = pd.to_datetime(tevent, unit='s', origin=pd.Timestamp('1980-01-06'))
deltat = .02 # seconds around the event
deltat = pd.Timedelta(seconds=deltat)
sig = S2V1['strain_H1_whitenbp']
NR_windowed1 = NRV1[(NRV1['time'] > tevent-2*deltat) & (NRV1['time'] < tevent+deltat)]
NR_windowed2 = NRV1[(NRV1['time'] > tevent-3*deltat) & (NRV1['time'] < tevent)]
NR_windowed3 = NRV1[(NRV1['time'] > tevent-0.5*deltat) & (NRV1['time'] < tevent+deltat)]
win1 = NR_windowed1['NR_Waveform']
win2 = NR_windowed2['NR_Waveform']
win3 = NR_windowed3['NR_Waveform']
fig, (ax1, ax2, ax3) = plt.subplots(figsize=(20,18), nrows=3, ncols=1)
ax1.plot(win1)
ax1.set_title('Filter impulse response')
ax2.plot(win2)
ax2.set_title('Filter impulse response')
ax3.plot(win3)
ax3.set_title('Filter impulse response')
<matplotlib.text.Text at 0x16a805246d8>
First convolution with three windows
Trim 2 seconds around transformed signal due to behavior of fourier transform at ends of signal
tevent = 1126259462.422
tevent = pd.to_datetime(tevent, unit='s', origin=pd.Timestamp('1980-01-06'))
deltat = .02 # seconds around the event
deltat = pd.Timedelta(seconds=deltat)
sig = S2V1['strain_H1_whitenbp'] #.clip(-4,4) # threshold strain to respectable values
NR_windowed1 = NRV1[(NRV1['time'] > tevent-2*deltat) & (NRV1['time'] < tevent+deltat)]
NR_windowed2 = NRV1[(NRV1['time'] > tevent-3*deltat) & (NRV1['time'] < tevent)]
NR_windowed3 = NRV1[(NRV1['time'] > tevent-0.5*deltat) & (NRV1['time'] < tevent+deltat)]
win1 = NR_windowed1['NR_Waveform']
win2 = NR_windowed2['NR_Waveform']
win3 = NR_windowed3['NR_Waveform']
# trim due to conversion back to time domain, funny stuff with ends
trim = 4096 * 2
sig = sig[trim:-trim]
time_idx = S2V1['time'][trim:-trim]
convolved1 = signal.correlate(sig, win1, mode='same') / sum(win1)
convolved2 = signal.correlate(sig, win2, mode='same') / sum(win2)
convolved3 = signal.correlate(sig, win3, mode='same') / sum(win3)
S2_dict = {'time': time_idx, 'original_strain_H1': sig,
'convolved1_strain_H1': convolved1, 'convolved2_strain_H1': convolved2, 'convolved3_strain_H1': convolved3}
S2 = pd.DataFrame(data=S2_dict)
S2['Convolved_Agg'] = np.add(np.add(S2['convolved1_strain_H1'], S2['convolved1_strain_H1']), S2['convolved1_strain_H1'])
S2_windowed = S2[(S2['time'] > tevent-2*deltat) & (S2['time'] < tevent+deltat)]
fig, ax = plt.subplots(figsize=(20,36), nrows=7, ncols=1, sharex=False)
ax[0].plot(S2_windowed['original_strain_H1'])
ax[0].set_title('Original pulse')
ax[1].plot(win1)
ax[1].set_title('Filter impulse response window1')
ax[2].plot(S2_windowed['convolved1_strain_H1'])
ax[2].set_title('Filtered signal with window1')
ax[3].plot(win2)
ax[3].set_title('Filter impulse response window2')
ax[4].plot(S2_windowed['convolved2_strain_H1'])
ax[4].set_title('Filtered signal with window2')
ax[5].plot(win3)
ax[5].set_title('Filter impulse response window3')
ax[6].plot(S2_windowed['convolved3_strain_H1'])
ax[6].set_title('Filtered signal with window3')
<matplotlib.text.Text at 0x16a807f07f0>
def window_transform(win, i=0, fs=4096):
#fs = 4096
idx = np.arange(0,len(win), 1)
cosx = np.cos(idx*i/fs*np.pi)
fourier_win = np.fft.rfft(win) * np.fft.rfft(cosx)
inverse_win = np.fft.ifft(fourier_win)
return(inverse_win)
def plot_inverse(win, win_tr, i, fs=4096):
fourier_win = np.fft.rfft(win)
fourier_win_tr = np.fft.rfft(win_tr)
fig, ax = plt.subplots(figsize=(20,20), nrows=4, ncols=1, sharex=False)
ax[0].plot(fourier_win)
ax[0].set_title('Frequency domain of original window ')
ax[1].plot(fourier_win_tr)
ax[1].set_title('Frequency domain of transformed window with cos(' + str(i) + ' np.pi/' + str(fs) + ')')
ax[2].plot(win)
ax[2].set_title('Time domain of original window')
ax[3].plot(win_tr)
ax[3].set_title('Time domain of transformed window')
def plot_transformed_windows(win, win_tr, i, fs=4096):
fourier_win = np.fft.rfft(win)
fourier_win_tr = np.fft.rfft(win_tr)
win_tr = np.fft.ifft(fourier_win_tr)
fig, ax = plt.subplots(figsize=(12,6), nrows=1, ncols=3, sharex=False)
ax[0].plot(fourier_win_tr)
ax[0].set_title('Frequency domain of transformed window with cos(' + str(i) + ' np.pi/' + str(fs) + ')')
ax[1].plot(win)
ax[2].plot(win_tr, color='red')
ax[2].set_title('Time domain of transformed window with cos(' + str(i) + ' np.pi/' + str(fs) + ')')
def signal_convolve(sig, window):
# trim due to conversion back to time domain, funny stuff with ends
trim = 4096 * 2
trimmed_sig = sig[trim:-trim]
convolved = signal.correlate(trimmed_sig, window, mode='same') / sum(window)
return(convolved)
plt.rcParams['agg.path.chunksize'] = 20000
sig = S2V1['strain_H1_whitenbp'] #.clip(-4,4) # threshold strain to respectable values
win1 = NR_windowed1['NR_Waveform']
time_idx = S2V1['time']
fig, ax = plt.subplots(figsize=(20,12), nrows=1, ncols=1)
convolved_signal = signal_convolve(sig, win1)
ax.plot(convolved_signal)
ax.set_title('signal1 convolved with window1')
<matplotlib.text.Text at 0x16a80998828>
for i in range(1,37,2):
fs = 5
win1_tr = window_transform(win1, i, fs)
plot_transformed_windows(win1, win1_tr, i, fs)
C:\ProgramData\Anaconda3\lib\site-packages\numpy\fft\fftpack.py:370: ComplexWarning: Casting complex values to real discards the imaginary part a = array(a, copy=True, dtype=float) C:\ProgramData\Anaconda3\lib\site-packages\numpy\core\numeric.py:531: ComplexWarning: Casting complex values to real discards the imaginary part return array(a, dtype, copy=False, order=order)
def first_layer(sig, windows, n_freqs, fs):
'''
first layer, calculate convolution of signal with three windows
'''
cosmic_convolution = []
for win_i in windows:
for i in n_freqs:
transformed_i = window_transform(win_i, i, fs)
convolved_signal = signal_convolve(sig, transformed_i)
cosmic_convolution.append(convolved_signal)
return(cosmic_convolution)
def output_layer(cosmic_convolution):
'''
output layer, average
'''
cosmic_convolution_mean = np.average(cosmic_convolution, axis=0)
return(cosmic_convolution_mean)
sig1 = S2V1['strain_H1_whitenbp']
sig2 = S2V2['strain_H1_whitenbp']
windows = [win1, win2, win3]
#n_freqs = np.arange(0,2048, 128)
fs = 5 # saw good performace with denominator 5, rather than 4096
n_freqs = np.arange(1,37,2) # every odd integer from 1 to 37
sig1_first_output = first_layer(sig1, windows, n_freqs, fs)
sig1_second_output = output_layer(sig1_first_output)
sig2_first_output = first_layer(sig2, windows, n_freqs, fs)
sig2_second_output = output_layer(sig2_first_output)
C:\ProgramData\Anaconda3\lib\site-packages\scipy\signal\signaltools.py:789: ComplexWarning: Casting complex values to real discards the imaginary part return out.astype(volume.dtype)
plt.rcParams['agg.path.chunksize'] = 20000
fig, ax = plt.subplots(figsize=(20,20), nrows=2, ncols=1, sharex=False)
ax[0].plot(sig1_second_output)
ax[0].set_title('First signal NN output')
ax[1].plot(sig2_second_output)
ax[1].set_title('Second signal NN output')
C:\ProgramData\Anaconda3\lib\site-packages\numpy\core\numeric.py:531: ComplexWarning: Casting complex values to real discards the imaginary part return array(a, dtype, copy=False, order=order)
<matplotlib.text.Text at 0x16ab0498080>
Map the index of the max value from output to original signal. Plot near event and overlay with best window to see if it looks like a GW.
Output layer averages windows with the same weight. Use MLE to add weight to windows that give higher values around known events.
Parameterize shapes of windows (simply picked $cos(i*\pi/5)$). Kill windows that don't offer improvement